# -*- coding: utf-8 -*-
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from scrapy.utils.project import get_project_settings
from scrapy_redis.spiders import RedisSpider
from scrapy_redis.utils import bytes_to_str
from zc_core.dao.batch_dao import BatchDao
from zc_core.model.items import Box
from zc_core.util.http_util import retry_request
from changsha.rules import *
from zc_core.spiders.base import BaseSpider


class FullSpider(RedisSpider):
    name = 'full'
    # 常用链接
    sku_list_url = 'http://hunan.gpmart.cn/frontBrands/getNewBrandsAndProductInfos.action?level=3&orderBy=normal&shopInfoId={}&productTypeId={}&brandParams=&brandId=&currentPage={}'

    def __init__(self, batchNo=None, *args, **kwargs):
        if not batchNo:
            self.batch_no = time_to_batch_no(datetime.now())
        else:
            self.batch_no = int(batchNo)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        settings = get_project_settings()
        self.suppliers = settings.getdict('SUPPLIERS', {})
        # 任务源
        self.redis_key = '{}:task:item:{}'.format(settings.get('BOT_NAME'), self.batch_no)
        # scrapy_redis 基本配置
        self.custom_settings = {
            'DUPEFILTER_CLASS': 'scrapy_redis.dupefilter.RFPDupeFilter',
            'SCHEDULER': 'scrapy_redis.scheduler.Scheduler',
            'SCHEDULER_PERSIST': True,
            'SCHEDULER_IDLE_BEFORE_CLOSE': 5,
            'SCHEDULER_QUEUE_KEY': '{}:%(spider)s:requests:{}'.format(settings.get('BOT_NAME'), self.batch_no),
            'SCHEDULER_DUPEFILTER_KEY': '{}:%(spider)s:dupefilter:{}'.format(settings.get('BOT_NAME'), self.batch_no),
        }
        super(FullSpider, self).__init__(*args, **kwargs)

    def make_request_from_data(self, data):
        data = bytes_to_str(data, self.redis_encoding)
        row = json.loads(data)
        cat_id = row.get('catId')
        sp_id = row.get('spId')
        page = row.get('page')

        # 采集sku列表
        if sp_id and page:
            return Request(
                method='POST',
                url=self.sku_list_url.format(sp_id, cat_id, page),
                callback=self.parse_item,
                errback=self.error_back,
                meta={
                    'reqType': 'item',
                    'batchNo': self.batch_no,
                    'catalogId': cat_id,
                    'supplierId': sp_id,
                    'page': page
                },
                priority=200,
                dont_filter=True
            )
        else:
            self.logger.error('参数异常：%s' % data)

    # 处理sku列表
    def parse_item(self, response):
        meta = response.meta
        page = meta.get('page')
        cat_id = meta.get('catalogId')
        sp_id = meta.get('supplierId')
        skus, items = parse_item_data(response)
        if skus:
            self.logger.info('清单: sp=%s, cat=%s, page=%s, count=%s' % (sp_id, cat_id, page, len(skus)))
            yield Box('sku', self.batch_no, skus)
        if items:
            self.logger.info('商品: sp=%s, cat=%s, page=%s, count=%s' % (sp_id, cat_id, page, len(skus)))
            yield Box('item', self.batch_no, items)
        else:
            self.logger.info('分页完成: sp=%s, cat=%s, page=%s]' % (sp_id, cat_id, page))

    def error_back(self, e):
        if e.type and e.type == IgnoreRequest:
            self.logger.info(e.value)
        else:
            if e.value and e.value.response:
                meta = e.request.meta
                self.logger.error('响应异常: [%s][%s] -> [%s]' % (meta.get('proxy', ''), e.value.response, meta))
                yield retry_request(e.request)
            elif e.request:
                self.logger.error('请求异常: [%s][%s] -> [%s]' % (str(type(e)), e.request.url, e.request.meta))
                yield retry_request(e.request)
            else:
                self.logger.error('未知异常: %s' % e)
